***************
* Title: gambia_ecd_edcc_table3.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: ECD_3to6_Gambia_cleanv1.dta
* Outputs: gambia_ecd_edcc_table3.[txt/out], gambia_ecd_edcc_table3[a-b].xls
* Notes: creates Table 3
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
log using analysis\output\gambia_ecd_edcc_table3.txt, text replace;

* LOAD AND PREPARE DATA;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;
/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

/*child mother tongue*/
qui gen child_mandinka=(child_mothertongue==1);
qui gen child_fula=(child_mothertongue==2);
qui gen child_jola=(child_mothertongue==4);
qui gen child_serahule=(child_mothertongue==5);
qui gen child_other=(child_mothertongue==3|child_mothertongue>=6); /*includes missing*/
foreach x in mandinka fula jola serahule other {;
	lab var child_`x' "child's mother tongue is `x'";
};

/*hours worked by HH head & mother*/
foreach x in headhh mother {;
	qui gen `x'_hrswrk_topcode=`x'_hrswrk;
	qui replace `x'_hrswrk_topcode=80 if `x'_hrswrk>80 & `x'_hrswrk!=.; /*top-code hours worked at 80*/
	qui tab `x'_hrswrk_cat, mi gen(`x'_hrswrk_cat);
};

/*winsorize annual per-capita expenditure below 1st and above 99th percentiles*/
foreach x in expend_yr_hhpc expend_yr_hhpc_usd {;
	winsor `x', gen(`x'_wins) p(0.01);
};
qui gen ecd_wtppctw_baseline=ecd_wtpq_baseline*12/expend_yr_hhpc_wins;
lab var expend_yr_hhpc_wins "Annual HH expenditure per capita, GMD, winsorized at 1/99 percentiles";
lab var expend_yr_hhpc_usd_wins "Annual HH expenditure per capita, USD, winsorized at 1/99 percentiles";
lab var ecd_wtppctw_baseline "Amount willing to pay for ECD as shr of monthly HH pc expenditure (winsorized) at baseline";

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

* repeat for having valid MDAT fine motor and language/hearing scores;
/*note that "in sample" defined as having at least one valid MDAT score, not both as in previous analyses of attrition*/
qui gen in_baseline_mdat=(in_baseline==1 & (zfinemotor_baseline!=.|zlanghear_baseline!=.));
qui replace in_baseline_mdat=. if in_endline_new==1;
foreach x in endline endline_old endline_new {;
	qui gen in_`x'_mdat=(in_`x'==1 & (zfinemotor_endline!=.|zlanghear_endline!=.));
};

* keep only children aged 3-6 with valid baseline interview & MDAT score;
qui keep if child_age_mths_dob>=36 & child_age_mths_dob<84 & child_age_mths_dob!=.; 
qui keep if in_baseline_mdat==1; 

* define indicators for treatment;
qui gen communitybased=(treatment==6);
qui gen purecontrol=(treatment==1);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);

* define full set of baseline covariates;
local X "child_age_mths child_female region2 child_preschlattend_baseline child_schlever_baseline child_mandinka child_fula
	child_jola child_serahule child_other hh_total mother_schl_ever mother_schl_yrs mother_readorwrite mother_married
	headhh_schl_ever headhh_schl_yrs headhh_readorwrite headhh_hrswrk_topcode headhh_hrswrk_cat1 headhh_hrswrk_cat2 
	headhh_hrswrk_cat3 headhh_hrswrk_cat4 headhh_hrswrk_cat5 headhh_work headhh_ag mother_hrswrk_topcode 
	mother_hrswrk_cat1 mother_hrswrk_cat2 mother_hrswrk_cat3 mother_hrswrk_cat4 mother_hrswrk_cat5 mother_work 
	mother_ag mother_unpaid expend_yr_hhpc_usd_wins dirtfloor thatchroof pc1 ecd_attend_baseline ecd_annex_baseline 
	ecd_wtp_baseline ecd_wtppctw_baseline child_sick_baseline child_bednet_baseline vaccinecount_baseline 
	vaccinepct_baseline vaccineany_baseline vitAcount_baseline vitApct_baseline vitAany_baseline booksinhh_none_bl 
	booksinhh_1to2_bl booksinhh_3to5_bl booksinhh_6plus_bl stimobjectscount_bl  stimobjectspct_bl stimplaycount_bl 
	stimplaypct_bl disc_nonvlnt_baseline disc_psychagg_baseline disc_vlnt_baseline discsevere_nonvlnt_baseline 
	discsevere_psychagg_baseline discsevere_vlnt_baseline incare_othchild_none_bl incare_othchild_some_bl 
	incare_othchild_occas_bl incare_othchild_most_bl incare_alone_none_bl incare_alone_some_bl incare_alone_occas_bl 
	incare_alone_most_bl funtime_never_bl funtime_some_bl funtime_often_bl funtime_always_bl mother_health1_baseline 
	mother_health_baseline finemotor_baseline_miss finemotor_baseline zfinemotor_baseline zfinemotor_adj_baseline
	langhear_baseline_miss langhear_baseline zlanghear_baseline zlanghear_adj_baseline haz_dob_baseline waz_dob_baseline 
	whz_dob_baseline bmiz_dob_baseline";

* define parsimonious set of X's;
local Xs "child_age_mths_dob child_female region2 ecd_attend_baseline zfinemotor_baseline zlanghear_baseline
	zfinemotor_adj_baseline zlanghear_adj_baseline haz_dob_baseline
	hh_total mother_schl_yrs headhh_schl_yrs headhh_work headhh_hrswrk_topcode headhh_ag expend_yr_hhpc_usd_wins pc1  
	ecd_wtppctw_baseline vaccinepct_baseline child_sick_baseline mother_health_baseline stimobjectspct_bl 
	discsevere_vlnt_baseline";
	
* BALANCE TESTS;
* get unadjusted differences (clustering standard errors by settlement), but adjust p-values for regional stratification;
* analyze community-based ECD and ECD Annex experiments separately;
* can verify that output is identical to regression-baed version of gambia_ecd_balance[6-7].do; 

* unadjusted means & differences by treatment status;
/*account for correlated outcomes by settlement by clustering at that level*/
/*ECD Annex treated v. ECD Annex control*/
orth_out `Xs' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_table3a.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;

/*community-based v. pure control*/
orth_out `Xs' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_table3b.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;
	
* adjusted means, adjusting for regional stratification; 
/*ECD Annex treated v. ECD Annex control*/
orth_out `Xs' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_table3a.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;

/*community-based v. pure control*/
orth_out `Xs' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_table3b.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;

local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;
